#!/usr/bin/env ruby


################################################################################
#    IP/Host Pattern Extractor
#  
#    license: GPL 
#    released date: 2011-03-13
#     
#
#    (c) Aung Khant, http://yehg.net               
#                                                 
#    YGN Ethical Hacker Group, Yangon, Myanmar
#
#    Check the update via
#    svn checkout http://host-extract.googlecode.com/svn/trunk/ host-extract
#
#    Send bugs, suggestions, contributions to host-extract @ yehg .net
#
################################################################################

require 'net/http'
require 'net/https'
require 'uri'
require 'open-uri'
require 'fileutils'
require 'optparse'
require 'rubygems'
require 'mechanize'

# change it if you want it
$useragent = {'User-Agent'=>'Mozilla/5.0 (Windows; U; Windows NT 7; en-US; rv:1.9.2.15) Gecko/20110303 Firefox/4.0'}

def decompress(string, type='deflate')
  require 'zlib'
  require 'stringio'
  buf = ''
  if type == 'deflate'
    zstream = Zlib::Inflate.new
    buf = zstream.inflate(string)
    zstream.finish
    zstream.close
  elsif type == 'gzip'
    tmp = Zlib::GzipReader.new(StringIO.new(string))
    buf = tmp.read
  end
  buf
end

def snip_str(haystack,needle)
    needle = needle.to_s.downcase
    h1 = haystack.split("\n")    
    h1.each do |l|        
        l.to_s.downcase!
        if l =~ /#{needle}/mi
            if l.index("#{needle}").to_i > 12
                sn = l[l.index("#{needle}").to_i - 11,l.index("#{needle}").to_i + 40] 
            else
                sn = l
            end
            sn = sn[0,60]
            sn.gsub!("\r\n","")
            sn.gsub!("\n","")
            return "\n      #source: .. #{sn}\n"
        end
    end
    return ''
end


def get_url(url,header_check=false)   
  begin    
    
    blacklist = url.to_s.scan(/jquery|yui\.|font\.js|mootools|googlesyndication|google\-analytics.com|addthis/)
    
    if blacklist.size > 0
        return
    end
    
    if File.exist?$outfile
        fout = File.new($outfile,"a")
        fout.puts("\n#########################################################################\n\n")
    else
        fout = File.new($outfile,"w")        
        fout.puts("# Generated by host-extract (c) Aung Khant, http://yehg.net/lab/\n# Send bugs/suggestions to host-extract @ yehg.net\n# Date: #{Time.now.strftime("%Y-%m-%d %H:%M:%S")}\n# --------------------------------------------------------------------------\n\n")  
    end
    
    puts  
    fout.puts("\n# URL: " + url)
    uri = URI.parse(url)
    
    puts 
    host = uri.host
    puts 'host: ' + host    
    
    path = uri.path
    path = '/'  if uri.path == nil or uri.path == ""
    path = path.gsub("//",'/')
    puts 'path: ' + path
    
    query = uri.query
    query = ''  if uri.query == nil
    puts 'query: ' + query unless uri.query == nil or  uri.query == ''
    
    
    puts
    http = Net::HTTP.new(uri.host,uri.port)
    http.read_timeout = 100
    http.open_timeout = 80
    http.use_ssl= true if uri.scheme == "https"
    http.verify_mode = OpenSSL::SSL::VERIFY_NONE if uri.scheme == "https"
    
    path = fix_uri(path)
    req,body = http.get(path+'?'+query,$useragent)
    
    if req.code =~ /(301|302)/        
            puts "-> #{url} | #{req.code.to_s}\n(Redirected to : " + req.header["location"]  + ")\n\n"
            fout.puts("# -> #{url} | #{req.code.to_s}\n# (Redirected to : " + req.header["location"]  + ")\n\n")
            
            unless req.header["location"] =~ /^http/i
                fout.close
                sleep(1)
                get_url($scheme + $domain + req.header["location"])
                fout.close
            else
                fout.close
                sleep(1)
                get_url(req.header["location"])
            end
                
    end
    
    if req['Content-Encoding'] =~ /gzip|deflate/
        body = decompress(body,req['Content-Encoding'])
    end

    
    if /^(2|5)/.match(req.code.to_s) 
      if body.length < 5
        return 
      end
      
      puts      
      
      if header_check == true
          header_value = ''
          if req.class.to_s == "Array"
            req.each do |r|            
            header_value =  r + ': ' + req[r].to_s + "\n" + header_value
            end
          end
          body = header_value

          puts "[*] searching for internal IP patterns in HTTP Headers ...\n\n"
          fout.puts("\n# [*] searching for internal IP patterns in HTTP Headers ...\n\n")
        
      else
          puts "[*] searching for internal IP patterns ...\n\n"
          fout.puts("\n# [*] searching for internal IP patterns ...\n\n")
      end

        
      int_ips = body.scan(/(192\.168\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(172\.(1[6-9]?|2[0-9]?|3[0-1])\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|(127\.0\.0\.1)|(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]))|(10\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))/im)
       
      int_ips.compact!
      int_ips.uniq! 
      
      if int_ips.size == 0
          puts "[x] no internal IP(s) found"
          fout.puts("# [x] no internal IP(s) found\n\n")
      else            
          valid_ips = 0  
          if int_ips.size == 1
             found = 0
             int_ips.each do |ipx|
               ipx.each do |ip|
                  if ip.to_s =~ /\./ 
                     
                      unless ip.to_s =~ /(:00|\d{1,3}\.000|\.0$|00\.0\.0\.0|\.0\.0\.0|0\.0\.0\.0|\.0\.0$)/
                          print "  - " 
                          
                          if $viewsource != 0
                              print ip + snip_str(body,ip)
                              fout.puts(ip + snip_str(body,ip)+ "\n")
                          else
                              print ip 
                              fout.puts(ip+ "\n")
                          end
                          puts 
                          found = 1
                          valid_ips = 1 + valid_ips
                      end
                  end
               end
             end
            
             ipx = int_ips.compact
             ipx[0].compact!
             ipx[0].uniq!         
             if found == 0
                ip = ipx.join('.')
                
                unless ip.to_s =~ /(:00|\d{1,3}\.000|\.0$|00\.0\.0\.0|\.0\.0\.0|0\.0\.0\.0|\.0\.0$)/
                    print "  - " 
                    
                    if $viewsource != 0
                        print ip + snip_str(body,ip)
                        fout.puts(ip + snip_str(body,ip)+ "\n")
                    else
                        print ip 
                        fout.puts(ip+ "\n")
                    end                
                    puts 
                    found = 1
                    valid_ips = 1 + valid_ips
                end
             end
          end 
          if int_ips.size > 1         
              int_ips.each do |ipx|
                 ipx.each do |ip|
                    if ip.to_s =~ /\./    
                        unless ip.to_s =~ /(:00|\d{1,3}\.000|\.0$|00\.0\.0\.0|\.0\.0\.0|0\.0\.0\.0|\.0\.0$)/        
                            print "  - " 
                            if $viewsource != 0
                                print ip + snip_str(body,ip)
                                fout.puts(ip + snip_str(body,ip) + "\n")
                            else
                                print ip 
                                fout.puts(ip+ "\n")
                            end
                            puts 
                            found = 1
                            valid_ips = 1 + valid_ips
                        end
                    end
                 end
              end
          end
        
          if valid_ips > 0
             puts "\n[*] " + valid_ips.to_s + ' internal IP(s) found!'
             fout.puts("\n# [*] " + valid_ips.to_s + " internal IP(s) found!\n\n")
             puts
          else
             puts "[x] no internal IP(s) found"
             fout.puts("# [x] no internal IP(s) found\n\n")
          end
        
      end
    
    if $findall != 0 and header_check != true
        puts
        puts '[*] searching for IP/domain patterns ...'
        fout.puts("\n# [*] searching for IP/domain patterns ...\n\n")
        a1 = body.scan(/([a-zA-Z0-9]+([\-\.]{1}[a-zA-Z0-9]+)*\.((au|de|fr|co\.jp|jp|uk|co\.uk|es|cn|it|com|net|org|info)\.([a-zA-Z]{2,3}+)|de|fr|co\.jp|jp|uk|co\.uk|es|cn|it|com|net|org|info|arpa|localdomain|localhost)(:[0-9]{1,5})?(\/)?|[a-zA-Z]{4,}[0-9]{0,}:[0-9]{2,5}|((25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?))|[a-zA-Z]{4,}[0-9]{0,}:[0-9]{2,5})/ixm)
        r1 = []
        
        if a1.size == 0
            puts "[x] no relevant IP/Domain(s) found"
            fout.puts("# [x] no relevant IP/Domain(s) found")
        end
        a1.uniq!
        a1.compact!
        
        (a1.size-1).times.each do |i|
            r1 << a1[i][0] unless a1[i][0]  == nil or a1[i][0] == ''
        end 
        rn = 0
        r2 = []
        r1.compact!
        r1.uniq!        
        puts 
        
        r1.each do |r|            
            # common false positives 

            unless r =~ /(wdith:|wieght:|hieght:|heeight:|remove:|timespan:|rank:|position:|top:|mapentry:|left:|bottom:|right:|padding:|margin:|index:|size:|version:|ver:|w3\.org|www\.adobe\.com|download\.macromedia\.com|char|span\.|delay:|open:|close:|rate:|out:|post(s?):|photo(s?):|comment(s?):|key(s?):|group(s?):|profile(s?):|exp:|Arbiter\.info|code:|id:|\info\.com|interval:|time:|timer:|freq:|indent:|margin:|count:|navbar\.com|document\.com|expire(s?)\:|^fact\:|^slow\:|^fast\:|^default\:|^time(r?)\:|^(tr|div|td|form|img)\.com|^align:|widows:|^heidht:|color:|counter:|^name:|asp\.net|window\.net|window\.com|price:|news:|heeight:|timespan:|^spam:|^month:|^wait:|^deletion:|^radius:|date:|minh:|^built:|^gfnt:|^gimp:|^channel:|this\.options\.com|price:|row:|self\.info|div\.info|lbgc:|pageinfo:|ewtax:|number\:|column:|rows:|this\.com|:00|\d{1,3}\.000|\.0$|this\.|00\.0\.0\.0|00\.00\.00|\.0\.0\.0|0\.0\.0\.0|\.0\.0$|^252f|^2f|^3a|background:|show:|background\-attachment:|background\-color:|background\-image:|background\-position:|background\-repeat:|border:|border\-bottom:|border\-bottom\-color:|border\-bottom\-style:|border\-bottom\-width:|border\-color:|border\-left:|border\-left\-color:|ajax:|token:|border\-left\-style:|border\-left\-width:|border\-right:|border\-right\-color:|border\-right\-style:|border\-right\-width:|border\-style:|border\-top:|border\-top\-color:|border\-top\-style:|border\-top\-width:|border\-width:|outline:|outline\-color:|outline\-style:|outline\-width:|height:|max\-height:|max\-width:|min\-height:|min\-width:|width:|font:|font\-family:|font\-size:|font\-style:|font\-variant:|font\-weight:|content:|counter\-increment:|counter\-reset:|quotes:|list\-style:|list\-style\-image:|list\-style\-position:|list\-style\-type:|margin:|margin\-bottom:|margin\-left:|margin\-right:|margin\-top:|padding:|padding\-bottom:|padding\-left:|padding\-right:|padding\-top:|bottom:|number:|clear:|clip:|cursor:|display:|float:|left:|overflow:|position:|right:|top:|visibility:|z\-index:|orphans:|page\-break\-after:|page\-break\-before:|page\-break\-inside:|widows:|border\-collapse:|border\-spacing:|weight:|caption\-side:|empty\-cells:|length:|table\-layout:|color:|direction:|letter\-spacing:|line\-height:|text\-align:|text\-decoration:|text\-indent:|text\-shadow:|text\-transform:|unicode\-bidi:|vertical\-align:|white\-space:|word\-spacing|return:|botto:|^down:|^up:|slide:|categ:|fece05:|free:|front:|show:|full:|start:|free6:|mootools:|head:|popup:|sledzik:|gatrack:|base:|visiblerows:|speed:|zone:|blog:|diff:|aries:|taurus:|gemini:|cancer:|leo:|virgo:|aquarius:|capricorn:|sagittarius:|scorpio:|libra:|pisces:|duration:|ewtax:|c\.jp|image(s?):|videos(s?):|lock:|retry(s?):|maxsel:|widt:|screen:|shift:|thresh:|control:|meta:|enter:|pause:|millis:|a\.info|console\.info|movie(s?):|page(s?):|trove:|bullet:|logo:|comma:|delete:|escape:|home:|insert:|decimal:|divide:|page|hpad|vpad|multiply:|subtract:|period:|space:|sensitivity:|tolerance:|s\.com|e\.com|area:|steps:)/mi
                if r =~ /\//
                  r = r[0,r.index('/')]
                  
                end
                snipped = snip_str(body,r) 
                unless snipped  =~ /([0-9]\.|:|V|Version" |Version%3d|effects\-|\?v=|version\-|version |\/v|version=|Player |Rescue Disk |version:|version\s:|\%|version\s|ver=|ver\s|ver:|jquery\-|\.js\?|\.jar\?|\.css\?)#{r}/im  or snipped  =~ /#{r}(\-compress|\.min\.|\sbeta|\.exe|\.js|:|\.css|\-more\.|\.jpg|\/prot|\.png|\.ico|\.gif|g"|g'|[a-z]|[0-9]|\.[0-9]|-_-|', false, flashvars)/im
                
                    rt1 = ''
                    r.downcase!
                    if r =~ /^www\./im
                      rt1 = r[r.index('www.')+4,r.length]  
                      rt1.downcase!
                    end 
                    unless rt1 == ''
                        unless r2.include?rt1                        
                            r2 << r
                        end 
                    else
                        r2 << r
                    end
                                    
                end
            end
        end
        
        ####### enum -> link, script, a, iframe, frame  ######################
        
        if ftype(path).to_s.length == 0 or query !~ /title=MediaWiki:Print.css/im
            
            amech = Mechanize.new { |agent|
                agent.user_agent = $useragent['User-Agent']
            }
            page = amech.get(url)     
            @insite_links = []
            
            
            if page =~ /<html/im
            
                scriptsrc = page.search("//script[@src]").map{ |s| s['src'] }
                if scriptsrc.size > 0
                  scriptsrc.each do |s|
                       if s =~ /\.js/i
                              if s =~ /^http/i  
                                 uri1 = URI.parse(s)
                                 r2 << uri1.host if uri1.host != nil
                              end
                       end
                  end
                end 
            
                cssrc = page.search("//link[@href]").map{ |s| s['href'] }
                if cssrc.size > 0
                    cssrc.each do |cs|  
                            if cs =~ /\.css/i          
                                if cs =~ /^http/i  
                                    uri2 = URI.parse(cs)
                                    r2 << uri2.host if uri2.host != nil
                                end
                            end
                    end      
                end 
                ahref = page.search("//a[@href]").map{ |s| s['href'] }
                ahref.each do |a|
                    if a =~ /^http/i 
                        uri3 = URI.parse(a)
                        r2 << uri3.host if uri3.host != nil                    
                    end
                end
                
                iframesrc = page.search("//iframe[@src]").map{ |s| s['src'] }
                iframesrc.each do |i|
                    if i =~ /^http/i 
                        uri4 = URI.parse(i)
                        r2 << uri4.host if uri4.host != nil                    
                    end
                end
                
                framesrc = page.search("//frame[@src]").map{ |s| s['src'] }
                framesrc.each do |f|
                    if f =~ /^http/i 
                        uri5 = URI.parse(f)
                        r2 << uri5.host if uri5.host != nil                    
                    end
                end
            end
        end
        ####################################################
          
        r2.compact!
        r2.uniq!
        rn1 = 0
        r2.each do |rx|
            if rx != nil or rx != '' or rx.to_s.length > 4 and rx.length < 60
                unless rx =~ /^0+/ and rx =~ /\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}/m 
                    rn1 = rn1 + 1
                    if $viewsource != 0
                        print "  - " + rx + snip_str(body,rx)
                        fout.puts(rx + snip_str(body,rx)+ "\n")
                    else
                        print "  - " + rx
                        fout.puts(rx+ "\n")
                    end
                    puts
                end 
            end
        end

        
        puts "\n[*] total IP/Host pattern(s): " + rn1.to_s 
        fout.puts("\n# [*] total IP/Host pattern(s): " + rn1.to_s)
        fout.close
    end
    elsif req.code == "404"
      #uncomment if you want
      puts "[!404] #{url} - wrong path "
      #puts
    end
  rescue Exception=>err
    if err.message !~ /end of file reached/
        if err.message =~ /execution expired/
            puts "\nERROR: #{url}\nthe server does not respond fast enough\ntry again later for more accurate result."
        elsif err.message =~ /closed stream/
            puts ''
        else
            puts "\nERROR: #{url}\n#{err.message}\n"
        end
        p err.backtrace.map{ |x|   
            x.match(/^(.+?):(\d+)(|:in `(.+)')$/); 
            ['line:' + $2,$4] 
        }

        
    end
    
  end  
end

def ftype(p)
 p.strip!

 if p =~ /\.(js|css)$/im
    return 'js|css'
 else 
    return ''
 end
end

def banner
  puts "\n==================================================================
IP/Host Pattern Extractor (c) Aung Khant, aungkhant[at]yehg.net
  YGN Ethical Hacker Group, Myanmar, http://yehg.net/

svn co http://host-extract.googlecode.com/svn/trunk/ host-extract
==================================================================\n\n"

end

def usage(s)
    puts s
    exit!
end

def fix_uri(u)
    URI.escape(u.to_s, Regexp.new("[^-_.!~*'\(\)a-zA-Z0-9\\d\/@\$]"))
end

def main

    options = {}  
    url_list = [] 
    
    parser = OptionParser.new do|opts|

        options[:findall] = false
        opts.on('-a','find all ip/host patterns') do |c|
            options[:findall] = true
        end 

        options[:js] = false
        opts.on('-j','scan all js files') do |j|
        options[:js] = true
        end 

        options[:css] = false
        opts.on('-c','scan all css files') do |cs|
        options[:css] = true
        end 

        options[:viewsource] = false
        opts.on('-v','append view-source html snippet for manual verification') do |c|
            options[:viewsource] = true
        end 

    end
    parser.parse!
    
    banner()
    
    url = (ARGV[0])?ARGV[0]:usage(parser.to_s)
    if url !~ /^http/
        url = 'http://' + url
    end
    
    uri = URI.parse(url)
    path = uri.path
    path = '/'  if uri.path == nil or uri.path == ""
    cur_dir = path[0,path.rindex('/')]    
    
    $domain = uri.host
    $scheme = 'http://' if url =~ /^http:/i
    $scheme = 'https://' if url =~ /^https:/i
    

    $outfile  = 'host-extract_' + $domain + '-' +  Time.now.strftime("%Y-%m-%d_%H-%M-%S") + '.txt'
    

    $findall = (options[:findall])?options[:findall]:0
    $viewsource = (options[:viewsource])?options[:viewsource]:0
    
    puts 'Target: ' + url
    
    url_list << url
    
    get_url(url)
    get_url(url,true)
    
    path = fix_uri(path)

    if ftype(path).to_s.length == 0
        
        amech = Mechanize.new { |agent|
            agent.user_agent = $useragent['User-Agent']
        }
        page = amech.get(url)     
        @insite_links = []
        
        scriptsrc = page.search("//script[@src]").map{ |s| s['src'] }
        scriptsrc_ind = []
        if scriptsrc.size > 0
          scriptsrc.each do |s|
               if s =~ /\.js/i
                      if s =~ /^\/\//
                        scriptsrc_ind << 'http:' +  s
                      elsif s !~ /^http(s?)/i  
                        scriptsrc_ind << uri.scheme + '://' + uri.host  + cur_dir + '/' + s
                      else
                        scriptsrc_ind << s
                      end
               end
          end
        end 
    
        cssrc = page.search("//link[@href]").map{ |s| s['href'] }
        cssrc_ind = []
        if cssrc.size > 0
            cssrc.each do |cs|  
                    if cs =~ /\.css/i          
                        if cs =~ /^\/\//
                            cssrc_ind << 'http:'  + cs
                        elsif cs !~ /^http(s?)/i  
                            cssrc_ind << uri.scheme + '://' + uri.host + cur_dir + '/' + cs
                        else                            
                            cssrc_ind << cs
                        end
                    end
            end      
        end 
        
        if options[:css] == true
            puts "\n[*] Scanning CSS files .."
            cssrc_ind.each do |cu|
                cu.gsub!('/index.php/','') #wiki url
                puts "\n\n[*] URL: " + cu + ' ...'
                get_url(cu)   
            end
            puts "\n[*] Done -> Scanning CSS files .."
        end
        
        if options[:js]  == true
                puts "\n[*] Scanning JS files .."
                scriptsrc_ind.each do |u|
                    u.gsub!('/index.php/','') #wiki url
                    puts "\n\n[*] URL: " + u + ' ...'
                    get_url(u)   
                end
                puts "\n[*] Done -> Scanning JS files .."
        end
        
    end
    

    
    
    puts "\n# Send bugs & suggestions to host-extract @ yehg.net"

    
end

if __FILE__ == $0
  main()
end

